import org.apache.spark.sql.SparkSession

object Distinct {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("Test")
      .master("local[*]")
      .getOrCreate()
    val sc = spark.sparkContext

    val rdd = sc.makeRDD(List(('a', 1),('a', 1),('b', 1),('c', 1)))

    println(rdd.distinct().collect())
  }
}
